# -*- coding:utf-8 -*-
from spider.parse import get
from Data.Read_and_Write import Redis_Pop
from lxml import etree


def _get_cate_url(kw):  # 获取redis中公司分类链接
    res = Redis_Pop(kw)
    item = eval(res)
    return item


def _name_check(name):
    name = str(name)
    if '有限公司' in name or '责任公司' in name:
        return name


def get_detail_url(kw):  # 获取详情页链接
    item = _get_cate_url(kw)
    if item is not None:  # 能取到值
        response = get(item['Cate_two_url'])
        html = etree.HTML(response)
        selectors = html.xpath('//div[@class="mach_list2"]/form[@name="jubao"]/dl/dt/h4/a')
        for selector in selectors:
            company_name_list = selector.xpath('./text()')
            company_name = company_name_list[0] if len(company_name_list) != 0 else None
            company_name = _name_check(company_name)
            if company_name is not None:
                company_url_list = selector.xpath('./@href')
                company_url = company_url_list[0] if len(company_url_list)!=0 else None
                if company_url is not None:
                    item['company_name'],item['company_url'] = company_name,company_url
                    print(item)


    else:
        print('Finish all the task')


if __name__ == '__main__':
    kw = 'hainan'
    get_detail_url(kw)
